__file__ = 'A08.py'
__author__ = 'Jerry Liu'
__date__ = '2016-04-14'

"""
    问题：一个HTML文件，找出里面的 正文
    常用库：BeautifulSoup
    安装pillow库：
    1、http://www.lfd.uci.edu/~gohlke/pythonlibs/下载 BeautifulSoup-3.2.1-py2-none-any.whl
    2、使用命令pip install BeautifulSoup-3.2.1-py2-none-any.whl进行安装
"""
from bs4 import BeautifulSoup
import urllib.request

def gethtml(url):
    htmlsource = urllib.request.urlopen(url).read()
    bs = BeautifulSoup(htmlsource, "html.parser")
    print(bs.body.contents) # text.encode('GBK', 'ignore').decode('GBK')

if __name__ == '__main__':
    gethtml('http://item.jd.com/1455427.html')
